import asyncio
import datetime
import os
import random

from apps.database import save_ods, mongo
from apps.global_config import configs


async def run():
    mongo.init(configs.MONGO_URI)
    path_list = [
        (r"G:\备份\扫叶提供维普数据2023.08\04-1經類", 4, 1),
        (r"G:\备份\扫叶提供维普数据2023.08\04-2史類", 4, 2),
        (r"G:\备份\扫叶提供维普数据2023.08\04-3子類", 4, 3),
        (r"G:\备份\扫叶提供维普数据2023.08\04-4集類", 4, 4),
        (r"G:\备份\扫叶提供维普数据2023.08\02地名", 3, 0)

    ]
    count = 1
    for path, source_type, type_id in path_list:
        for path, dirNames, fileNames in os.walk(path, topdown=True):
            # 获取当前目录的所有文件
            temp_lists = []
            for fileName in fileNames:
                if fileName.split(".")[-1].lower() != 'txt':
                    continue
                temp_lists.append(fileName)
            temp_dicts = {}
            for i in temp_lists:
                if i.split('.')[0][-2:].isdigit():
                    key = i.split('.')[0][:-2]
                    if key in temp_dicts.keys():
                        temp_dicts[key].append(i)
                    else:
                        temp_dicts[key] = [i]
                else:
                    if i.split('.')[0] in temp_dicts.keys():
                        temp_dicts[i.split('.')[0]].append(i)
                    else:
                        temp_dicts[i.split('.')[0]] = [i]

            for k, v in temp_dicts.items():
                print(k)
                new_v = [os.path.join(path, item).replace(r"G:\备份\扫叶提供维普数据2023.08", "").lstrip("\\") for item in v]
                print(new_v)
                now = datetime.datetime.now()
                dicts = {
                    "_id": "1" + str(count).rjust(4, '0'),
                    "lngid": "1" + str(count).rjust(4, '0'),
                    "rawid": str(count).rjust(4, '0'),
                    "process_id": str(random.randint(1, 999)).rjust(3, '0'),
                    "source_type": source_type,
                    "type_id": type_id,
                    "file_name": new_v,
                    "latest_date": now.strftime("%Y%m%d%H%M%S%f"),
                    "upload_time": now,
                    "status": 0,
                    "error_info": ""
                }
                print(dicts)
                count = count + 1
                await save_ods({"_id": dicts["_id"]}, dicts)


async def run2():
    mongo.init(configs.MONGO_URI)
    path = r"G:\备份\扫叶提供维普数据2023.08\03日历"
    count = 62
    for path, dirNames, fileNames in os.walk(path, topdown=True):
        # 获取当前目录的所有文件
        temp_lists = []
        for fileName in fileNames:
            if fileName.split(".")[-1].lower() != 'txt':
                continue
            temp_lists.append(fileName)
        temp_dicts = {}
        for i in temp_lists:
            if i.split('.')[0][-2:].isdigit():
                key = i.split('.')[0][:-2]
                if key in temp_dicts.keys():
                    temp_dicts[key].append(i)
                else:
                    temp_dicts[key] = [i]
            else:
                if i.split('.')[0] in temp_dicts.keys():
                    temp_dicts[i.split('.')[0]].append(i)
                else:
                    temp_dicts[i.split('.')[0]] = [i]

        for k, v in temp_dicts.items():
            print(k)
            new_v = [os.path.join(path, item).replace(r"G:\备份\扫叶提供维普数据2023.08", "").lstrip("\\") for item in v]
            print(new_v)
            now = datetime.datetime.now()
            dicts = {
                "_id": "1" + str(count).rjust(4, '0'),
                "lngid": "1" + str(count).rjust(4, '0'),
                "rawid": str(count).rjust(4, '0'),
                "process_id": str(random.randint(1, 999)).rjust(3, '0'),
                "source_type": 2,
                "type_id": 0,
                "file_name": new_v,
                "latest_date": now.strftime("%Y%m%d%H%M%S%f"),
                "upload_time": now,
                "status": 0,
                "error_info": ""
            }
            print(dicts)
            count = count + 1
            await save_ods({"_id": dicts["_id"]}, dicts)


async def run3():
    mongo.init(configs.MONGO_URI)
    path = r"G:\备份\扫叶提供维普数据2023.08\04-2史類2"
    count = 65
    for path, dirNames, fileNames in os.walk(path, topdown=True):
        # 获取当前目录的所有文件
        temp_lists = []
        for fileName in fileNames:
            if fileName.split(".")[-1].lower() != 'txt':
                continue
            temp_lists.append(os.path.join(path,fileName))
        temp_dicts = {}
        for i in temp_lists:
            key = i.split('\\')[-2]
            if key in temp_dicts.keys():
                temp_dicts[key].append(i)
            else:
                temp_dicts[key] = [i]

        for k, v in temp_dicts.items():
            print(k)
            new_v = [os.path.join(path, item).replace(r"G:\备份\扫叶提供维普数据2023.08", "").replace('04-2史類2','04-2史類').lstrip("\\") for item in v]
            print(new_v)
            now = datetime.datetime.now()
            dicts = {
                "_id": "1" + str(count).rjust(4, '0'),
                "lngid": "1" + str(count).rjust(4, '0'),
                "rawid": str(count).rjust(4, '0'),
                "process_id": str(random.randint(1, 999)).rjust(3, '0'),
                "source_type": 4,
                "type_id": 2,
                "file_name": new_v,
                "latest_date": now.strftime("%Y%m%d%H%M%S%f"),
                "upload_time": now,
                "status": 0,
                "error_info": ""
            }
            print(dicts)
            count = count + 1
            await save_ods({"_id": dicts["_id"]}, dicts)


async def run4():
    mongo.init(configs.MONGO_URI)
    path = r"G:\备份\扫叶提供维普数据2023.08\01人名"
    count = 89
    for path, dirNames, fileNames in os.walk(path, topdown=True):
        # 获取当前目录的所有文件
        temp_lists = []
        for fileName in fileNames:
            if fileName.split(".")[-1].lower() != 'txt':
                continue
            temp_lists.append(fileName)
        temp_dicts = {}
        for i in temp_lists:
            if i.split('.')[0] in temp_dicts.keys():
                temp_dicts[i.split('.')[0]].append(i)
            else:
                temp_dicts[i.split('.')[0]] = [i]

        for k, v in temp_dicts.items():
            new_v = [os.path.join(path, item).replace(r"G:\备份\扫叶提供维普数据2023.08", "").lstrip("\\") for item in v]
            now = datetime.datetime.now()
            dicts = {
                "_id": "1" + str(count).rjust(4, '0'),
                "lngid": "1" + str(count).rjust(4, '0'),
                "rawid": str(count).rjust(4, '0'),
                "process_id": str(random.randint(1, 999)).rjust(3, '0'),
                "source_type": 1,
                "type_id": 0,
                "file_name": new_v,
                "latest_date": now.strftime("%Y%m%d%H%M%S%f"),
                "upload_time": now,
                "status": 0,
                "error_info": ""
            }
            print(dicts)
            count = count + 1
            await save_ods({"_id": dicts["_id"]}, dicts)

if __name__ == "__main__":
    asyncio.run(run4())
